In [1]:
import pandas as pd
import altair as alt
In [2]:
url = 'https://raw.githubusercontent.com/UIUC-iSchool-DataViz/is445_data/main/licenses_fall2022.csv'
data = pd.read_csv(url)
data.head()  # Display the first few rows to understand the data
Out[2]:
_id License Type Description License Number License Status Business Title First Name Middle Last Name ... Specialty/Qualifier Controlled Substance Schedule Delegated Controlled Substance Schedule Ever Disciplined LastModifiedDate Case Number Action Discipline Start Date Discipline End Date Discipline Reason
0 1189509 DETECTIVE BOARD PERMANENT EMPLOYEE REGISTRATION 129446286 NOT RENEWED N NaN EILEEN NaN SANTACRUZ ... NaN NaN NaN N 03/18/2022 NaN NaN NaN NaN NaN
1 801037 DETECTIVE BOARD FIREARM CONTROL CARD 229030294.0 NOT RENEWED N NaN DAGMAR J NORDLUND ... NaN NaN NaN N 08/16/2006 NaN NaN NaN NaN NaN
2 365129 COSMO LICENSED COSMETOLOGIST 11053076.0 NOT RENEWED N NaN RADOJE NaN ZELENOVIC ... NaN NaN NaN N 05/26/2006 NaN NaN NaN NaN NaN
3 595427 COSMO LICENSED COSMETOLOGIST 11295645.0 ACTIVE N NaN BECKY SUE L BURROUGHS ... NaN NaN NaN N 11/12/2021 NaN NaN NaN NaN NaN
4 653668 COSMO LICENSED NAIL TECHNICIAN 169006247 NOT RENEWED N NaN BILL G L LETNER ... NaN NaN NaN N 05/30/2006 NaN NaN NaN NaN NaN

5 rows × 31 columns

In [3]:
# Checking for missing values
print(data.isnull().sum())

data = data.dropna(subset=['License Status'])
data = data.drop(columns=['Middle', 'Suffix', 'Title'])  # Example of dropping columns


# Convert dates if needed
data['LastModifiedDate'] = pd.to_datetime(data['LastModifiedDate'])
_id                                            0
License Type                                   0
Description                                    0
License Number                                60
License Status                                 0
Business                                       0
Title                                       9890
First Name                                   395
Middle                                      6378
Last Name                                    395
Prefix                                      9997
Suffix                                      9590
Business Name                                  0
BusinessDBA                                 9885
Original Issue Date                            5
Effective Date                               792
Expiration Date                              500
City                                          11
State                                          0
Zip                                           71
County                                       411
Specialty/Qualifier                         9692
Controlled Substance Schedule               9791
Delegated Controlled Substance Schedule    10000
Ever Disciplined                               0
LastModifiedDate                               0
Case Number                                 9657
Action                                      9658
Discipline Start Date                       9657
Discipline End Date                         9861
Discipline Reason                           9713
dtype: int64
In [4]:
# Basic bar chart showing the count of licenses by License Type
alt.data_transformers.disable_max_rows()
chart1 = alt.Chart(data).mark_bar().encode(
    x=alt.X('License Type:N', title='License Type', sort='-y'),
    y=alt.Y('count():Q', title='Count of Licenses'),
    color='License Type:N'
).properties(
    title='Count of Licenses by Type',
    width=600,
    height=400
).configure_axis(
    labelAngle=-45
)

chart1
Out[4]:
In [16]:
filtered_data = data[data['License Status'].isin(['ACTIVE', 'EXPIRED', 'NOT RENEWED'])]  # Example statuses to focus on

filtered_chart = alt.Chart(filtered_data).mark_line(point=True).encode(
    x=alt.X('yearmonth(LastModifiedDate):T', title='Year-Month'),
    y=alt.Y('count():Q', title='Count of Licenses'),
    color='License Status:N',
    tooltip=['License Status', 'count()']
).properties(
    title='Filtered Count of Licenses by Status',
    width=700,
    height=400
).interactive()

filtered_chart
Out[16]:
In [ ]: